# === load relevant libraries
library(data.table)
library(ggplot2)
library(DescTools)
library(plm)
library(lfe)
library(dplyr)
library(sandwich)
library(reshape)

rm(list = ls())

# monthly style-level fund flows, rating, and return
data = readRDS('input_data/morningstar_style_data.RDS')
data = data[, list(yyyymm, category, rating_1, flow = 100*flow, ret = 100*ret)]
data = as.data.table(melt(data, id.vars = c('yyyymm','category')))
names(data)[3:4] = c('var','value')

# get volatility and dispersion for these variables
data = data[order(yyyymm, var, value)]
for (thisM in unique(data[, yyyymm])){
  for (thisVar in unique(data[, var])){
    data[(yyyymm == thisM) & (var == thisVar), bin := 1:9]
  }
}
rm(thisM, thisVar)

out = merge(data[bin == 9, list(yyyymm, var, top = value)],
            data[bin == 1, list(yyyymm, var, bottom = value)])
out = out[, list(yyyymm, var, spread = (top - bottom))]
out = merge(out, data[, list(stdDev = sd(value)), list(yyyymm, var)])
data = copy(out); rm(out)
data = as.data.table(melt(data, id.vars = c('yyyymm','var')))
names(data)[3] = 'measure'
data[, after_2002 := ifelse(yyyymm > 200206, 'Yes', 'No')]

# order the variables for outputting
data[var == 'rating_1', var := '1. rating']
data[var == 'flow', var := '2. flow']
data[var == 'ret', var := '3. return']

tmp = unique(data[, list(var, measure)])
tmp = tmp[order(var, measure)]
tmp[, var_output := paste0(var, ' ', measure)]
data = merge(data, tmp, by = c('var','measure'))
data_bk = copy(data)


# function to run regression and compute Newey-West standard errors
p.estimate_one = function(this){
  out = copy(data[var_output == this])
  fit = lm(value ~ after_2002, out)
  return(data.table(var_output = this, type = c('coef','s.e.'), value = round(c(fit$coef[2], sqrt(NeweyWest(fit)[2,2])),2)))
}

# output first row: full sample
data = copy(data_bk)
out = Reduce(rbind, lapply(tmp[, var_output], p.estimate_one))
cast(out, type ~ var_output)

# output second row: 2000Q3–2004Q2
data = copy(data_bk[(yyyymm >= 200007) & (yyyymm <= 200406)])
out = Reduce(rbind, lapply(tmp[, var_output], p.estimate_one))
cast(out, type ~ var_output)

# output third row: exclude 2000Q3–2004Q2
data = copy(data_bk[!((yyyymm >= 200007) & (yyyymm <= 200406))])
out = Reduce(rbind, lapply(tmp[, var_output], p.estimate_one))
cast(out, type ~ var_output)


